EDA is an iterative cycle that helps you understand what your data says. When you do EDA, you:
Generate questions about your data
Search for answers by visualising, transforming, and/or modeling your data
Use what you learn to refine your questions and/or generate new questions
EDA is an important part of any data analysis. You can use EDA to make discoveries about the world; or you can use EDA to ensure the quality of your data, asking questions about whether the data meets your standards or not. (Posit Primers: EDA)
df_dataframe_name <- WDI(indicators = c(name1 = "Indicator Code 1",
name2 = "Indicator Code 2"), extra = TRUE)
Write and read:
write_csv(df_dataframe_name, "data/dataframe_name.csv")
df_dataframe_name <- read_csv("data/dataframe_name.csv")
head(), str(), summary(), and
try df_dataframe_name. See also Environment Tab of
RStudio.
df_dataframe_name |> filter(var == "value")
df_dataframe_name |> filter(var %in% c("value_1", ... , "value_n")
df_dataframe_name |> filter(var != "value")
df_dataframe_name |> drop_na(var)
df_dataframe_name |> mutate(var_new = var1 * var2)}
arrange()df_dataframe_name |> arrange(var)
df_dataframe_name |> arrange(dsc(var))
Visualizing using ggplot() + geom_*()
What type of variation occurs within my variables?
What type of covariation occurs between my variables?
transformed_data |> ggplot(aes(year, name1)) + geom_line()
transformed_data |> ggplot(aes(year, name2)) + geom_line()
transformed_data |> ggplot(aes(name1, name2)) + geom_point()
transformed_data |> ggplot(aes(name1, name2)) + geom_point() + scale_x_log10()
transformed_data |> ggplot(aes(name1, name2)) + geom_point() +
geom_smooth(method = "lm", se = FALSE)
transformed_data |> ggplot(aes(name1, name2)) + geom_point() +
geom_smooth(method = "lm", se = FALSE) + scale_x_log10()
transformed_data |> ggplot(aes(name1)) + geom_histogram()
categorical_var: factor(year),
income, region
transformed_data |> ggplot(aes(categorical_var, name1)) + geom_boxplot()
library(tidyverse)
library(WDI)
We study the relation between the CO2 emission per capita and the GDP per capita using the following two World Development Indicators.
df_co2gdp <- WDI(indicator = c(co2pcap = "EN.ATM.CO2E.PC", gdppcap = "NY.GDP.PCAP.PP.KD"),
extra = TRUE)
write_csv(df_co2gdp, "data/co2gdp.csv")
df_co2gdp <- read_csv("data/co2gdp.csv")
Rows: 16758 Columns: 14── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (7): country, iso2c, iso3c, region, capital, income, lending
dbl (5): year, co2pcap, gdppcap, longitude, latitude
lgl (1): status
date (1): lastupdated
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
COUNTRY <- "World"
df_co2gdp |> filter(country == COUNTRY) |> drop_na(co2pcap) |>
ggplot(aes(year, co2pcap)) + geom_line() +
labs(title = expression(paste(CO[2], " per capita of the World")),
y = expression(paste(CO[2], " per capita in tons")))
ISO2C <- c("JP", "CN", "ID", "GB", "US", "DE", "FR")
df_co2gdp |> filter(iso2c %in% ISO2C) |> drop_na(co2pcap) |>
ggplot(aes(year, co2pcap, col = iso2c)) + geom_line() +
labs(title = expression(paste(CO[2], " per capita of seven conutries with large GDP")),
subtitle = "China, Germany, France, United Kingdom, India, Japan, United States",
y = expression(paste(CO[2], " per capita in tons")))
COUNTRY <- "World"
df_co2gdp |> filter(country == COUNTRY) |> drop_na(gdppcap) |>
ggplot(aes(year, gdppcap)) + geom_line() +
labs(title = "GDP per capita of the World")
ISO2C <- c("JP", "CN", "ID", "GB", "US", "DE", "FR")
df_co2gdp |> filter(iso2c %in% ISO2C) |> drop_na(gdppcap) |>
ggplot(aes(year, gdppcap, col = iso2c)) + geom_line() +
labs(title = "GDP per capita of seven countries with large GDP",
subtitle = "China, Germany, France, United Kingdom, India, Japan, United States",
y = "GDP per capita PPP",
caption = "constant 2017 international usd")
df_co2gdp |> filter(year == 2020) |> filter(region != "Aggregates") |>
drop_na(co2pcap) |> arrange(desc(co2pcap))
df_co2gdp |> filter(year == 2020) |> filter(region != "Aggregates") |>
drop_na(co2pcap) |> arrange(co2pcap)
Observations and Questions:
Top 10 countries of CO2 emission per capita:
Lowest 10 countries of CO2 emission per capita:
df_co2gdp |> filter(year == 2020) |> filter(region != "Aggregates") |>
drop_na(gdppcap) |> arrange(desc(gdppcap))
df_co2gdp |> filter(year == 2020) |> filter(region != "Aggregates") |>
drop_na(gdppcap) |> arrange(gdppcap)
df_co2gdp |> filter(year == 2020) |> filter(region != "Aggregates") |>
drop_na(co2pcap) |> filter(income != "Not classified") |>
ggplot(aes(co2pcap, fill = factor(income, levels = INCOME))) + geom_histogram(bins = 15, col = "black", linewidth = 0.1) +
scale_x_log10() +
labs(title = "Histogram of CO2 per capita in 2020", fill = "")
df_co2gdp |> filter(year %in% c(1990, 2000, 2010, 2020)) |> filter(region != "Aggregates") |>
drop_na(co2pcap) |> filter(co2pcap > 0) |> filter(income != "Not classified") |>
ggplot(aes(co2pcap, fill = factor(year))) +
geom_histogram(bins = 15, col = "black", linewidth = 0.1) +
scale_x_log10() + facet_wrap(~year) +
labs(title = "Histogram of CO2 per capita in 1990, 2000, 2010, 2020", fill = "")
df_co2gdp |> filter(year %in% c(1990, 2000, 2010, 2020)) |> filter(region != "Aggregates") |>
drop_na(co2pcap) |> filter(co2pcap > 0) |> filter(income != "Not classified") |>
ggplot(aes(co2pcap, factor(year), fill = factor(year))) +
geom_boxplot() + scale_x_log10() + labs(y = "") + theme(legend.position = "none")
df_co2gdp |> filter(year == 2020) |> filter(region != "Aggregates") |>
drop_na(co2pcap) |> filter(co2pcap > 0) |> filter(income != "Not classified") |>
ggplot(aes(co2pcap, factor(income, levels = INCOME), fill = income)) +
geom_boxplot() + scale_x_log10() +
labs(title = "CO2 per capita by income level", y = "", fill = "") +
theme(legend.position = "none")
df_co2gdp |> filter(year == 2020) |> filter(region != "Aggregates") |>
drop_na(co2pcap) |> filter(co2pcap > 0) |>
ggplot(aes(co2pcap, region, fill = region)) +
geom_boxplot() + scale_x_log10() +
labs(title = "CO2 per capita by region", y = "", fill = "") +
theme(legend.position = "none")
df_co2gdp |> filter(year == 2020) |> filter(region != "Aggregates") |>
drop_na(gdppcap) |> filter(income != "Not classified") |>
ggplot(aes(gdppcap, fill = factor(income, levels = INCOME))) + geom_histogram(bins = 15, col = "black", linewidth = 0.1) +
scale_x_log10() +
labs(title = "Histogram of GDP per capita in 2020", fill = "")
df_co2gdp |> filter(year %in% c(1990, 2000, 2010, 2020)) |> filter(region != "Aggregates") |>
drop_na(gdppcap) |> filter(gdppcap > 0) |> filter(income != "Not classified") |>
ggplot(aes(gdppcap, fill = factor(year))) +
geom_histogram(bins = 15, col = "black", linewidth = 0.1) +
scale_x_log10() + facet_wrap(~year) +
labs(title = "Histogram of GDP per capita in 1990, 2000, 2010, 2020", fill = "") +
theme(legend.position = "none")
df_co2gdp |> filter(year %in% c(1990, 2000, 2010, 2020)) |> filter(region != "Aggregates") |>
drop_na(gdppcap) |> filter(gdppcap > 0) |> filter(income != "Not classified") |>
ggplot(aes(gdppcap, factor(year), fill = factor(year))) +
geom_boxplot() + scale_x_log10() + labs(y = "") + theme(legend.position = "none")
df_co2gdp |> filter(year == 2020) |> filter(region != "Aggregates") |>
drop_na(gdppcap) |> filter(gdppcap > 0) |> filter(income != "Not classified") |>
ggplot(aes(gdppcap, factor(income, levels = INCOME), fill = income)) +
geom_boxplot() + scale_x_log10() +
labs(title = "GDP per capita by income level", y = "", fill = "") +
theme(legend.position = "none")
df_co2gdp |> filter(year == 2020) |> filter(region != "Aggregates") |>
drop_na(gdppcap) |> filter(gdppcap > 0) |>
ggplot(aes(gdppcap, region, fill = region)) +
geom_boxplot() + scale_x_log10() +
labs(title = "GDP per capita by region", y = "", fill = "") +
theme(legend.position = "none")
df_co2gdp |> filter(year == 2020) |>
drop_na(gdppcap, co2pcap) |>
ggplot(aes(gdppcap, co2pcap)) + geom_point(aes(col = region)) +
geom_smooth(method = "lm", formula = 'y~x', se = FALSE) +
scale_x_log10() + scale_y_log10() +
labs(title = "GDP per capita vs CO2 per capita",
x = "GDP per capita",
y = expression(paste(CO[2], " per capita in tons")))
df_co2gdp |> filter(year == 2020) |> drop_na(gdppcap, co2pcap) |>
lm(log10(co2pcap)~log10(gdppcap), data = _) |> summary()
Call:
lm(formula = log10(co2pcap) ~ log10(gdppcap), data = drop_na(filter(df_co2gdp,
year == 2020), gdppcap, co2pcap))
Residuals:
Min 1Q Median 3Q Max
-0.60778 -0.15660 -0.00651 0.16129 0.59437
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -4.31545 0.13386 -32.24 <2e-16 ***
log10(gdppcap) 1.13831 0.03288 34.62 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.2362 on 228 degrees of freedom
Multiple R-squared: 0.8402, Adjusted R-squared: 0.8395
F-statistic: 1199 on 1 and 228 DF, p-value: < 2.2e-16
WDIsearch(string = "school enrollment.*(% gross)", field = "name", short = FALSE)
df_sec_ter_gdp <- WDI(indicator = c(sec = "SE.SEC.ENRR", ter = "SE.TER.ENRR",
gdppcap = "NY.GDP.PCAP.PP.KD"), extra = TRUE)
write_csv(df_secgdp, "data/sec_ter_gdp.csv")
df_secgdp <- read_csv("data/sec_ter_gdp.csv")
Rows: 16758 Columns: 14── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (7): country, iso2c, iso3c, region, capital, income, lending
dbl (5): year, sec, gdppcap, longitude, latitude
lgl (1): status
date (1): lastupdated
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
COUNTRY <- "World"
df_sec_ter_gdp |> filter(country == COUNTRY) |> drop_na(sec, ter) |>
ggplot() + geom_line(aes(year, sec), col = "blue") + geom_line(aes(year, ter), col = "red") +
labs(title = "School enrollment; Secondary and Tertiary",
subtitle = "secondary in blue and tertiary in red", y = "")
INCOME <- c("Low income", "Low & middle income", "Lower middle income", "Middle income", "Upper middle income", "High income")
df_sec_ter_gdp |> filter(country %in% INCOME) |> drop_na(sec, ter) |>
ggplot(aes(linetype = factor(country, levels = INCOME))) + geom_line(aes(year, sec), col = "blue") + geom_line(aes(year, ter), col = "red") + ylim(c(0,110)) +
labs(title = "School enrollment; Secondary and Tertiary",
subtitle = "secondary in blue and tertiary in red", linetype = "Income Levels", y = "")
df_sec_ter_gdp |> filter(year == 2020) |> drop_na(sec, ter, gdppcap) |>
ggplot() + geom_point(aes(gdppcap, sec), col = "blue") +
geom_point(aes(gdppcap, ter), col = "red") +
labs(title = "School enrollment; Secondary and Tertiary vs GDP per capita",
subtitle = "secondary in blue and tertiary in red", y = "")
df_sec_ter_gdp |> filter(year == 2020) |> drop_na(sec, ter, gdppcap) |>
ggplot() + geom_point(aes(gdppcap, sec), col = "blue") +
geom_point(aes(gdppcap, ter), col = "red") +
scale_x_log10() +
labs(title = "School enrollment; Secondary and Tertiary vs GDP per capita in log10 scale",
subtitle = "secondary in blue and tertiary in red", y = "")
df_sec_ter_gdp |> filter(year == 2020) |> drop_na(sec, ter, gdppcap) |>
ggplot() + geom_point(aes(gdppcap, sec), col = "blue") +
geom_point(aes(gdppcap, ter), col = "red") +
geom_smooth(aes(gdppcap, sec), col = "blue", method = "lm", formula = 'y~x', se = FALSE) +
geom_smooth(aes(gdppcap, ter), col = "red", method = "lm", formula = 'y~x', se = FALSE) +
scale_x_log10() +
labs(title = "School enrollment; Secondary and Tertiary vs GDP per capita in log10 scale",
subtitle = "secondary in blue and tertiary in red with regression lines", y = "")
df_sec_ter_gdp |> filter(year == 2020) |> drop_na(gdppcap, sec) |>
lm(sec~log10(gdppcap), data = _) |> summary()
Call:
lm(formula = sec ~ log10(gdppcap), data = drop_na(filter(df_sec_ter_gdp,
year == 2020), gdppcap, sec))
Residuals:
Min 1Q Median 3Q Max
-53.777 -10.846 -1.173 9.006 66.996
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -102.994 11.933 -8.631 6.38e-15 ***
log10(gdppcap) 46.088 2.841 16.222 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 15.64 on 157 degrees of freedom
Multiple R-squared: 0.6263, Adjusted R-squared: 0.624
F-statistic: 263.2 on 1 and 157 DF, p-value: < 2.2e-16
df_sec_ter_gdp |> filter(year == 2020) |> drop_na(gdppcap, ter) |>
lm(ter~log10(gdppcap), data = _) |> summary()
Call:
lm(formula = ter ~ log10(gdppcap), data = drop_na(filter(df_sec_ter_gdp,
year == 2020), gdppcap, ter))
Residuals:
Min 1Q Median 3Q Max
-72.696 -8.388 -0.808 8.589 89.657
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -159.817 13.877 -11.52 <2e-16 ***
log10(gdppcap) 49.861 3.303 15.09 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 19.18 on 157 degrees of freedom
Multiple R-squared: 0.592, Adjusted R-squared: 0.5894
F-statistic: 227.8 on 1 and 157 DF, p-value: < 2.2e-16
df_sec_ter_gdp |> filter(year == 2020, region != "Aggregates") |> drop_na(sec, region) |>
ggplot(aes(sec, region, fill = region)) + geom_boxplot() +
labs(x = "School enrollment, secondary (% gross)", y = "") + theme(legend.position = "none")
df_sec_ter_gdp |> filter(year == 2020, income !="Aggregates") |> drop_na(sec, income) |>
ggplot(aes(sec, factor(income, levels = INCOME), fill = income)) + geom_boxplot() +
labs(title = "Seconary education: School enrollment by income level", x = "School enrollment, secondary (% gross)", y = "") + theme(legend.position = "none")
df_sec_ter_gdp |> filter(year == 2020, region != "Aggregates") |> drop_na(ter, region) |>
ggplot(aes(ter, region, fill = region)) + geom_boxplot() +
labs(x = "School enrollment, tertiary (% gross)", y = "") + theme(legend.position = "none")
df_sec_ter_gdp |> filter(year == 2020, income != "Aggregates") |> drop_na(ter, income) |>
ggplot(aes(ter, factor(income, levels = INCOME), fill = income)) + geom_boxplot() +
labs(title = "Tertiary education: School enrollment by income level", x = "School enrollment, tertiary (% gross)", y = "") + theme(legend.position = "none")
Observations
We study …..
chosen_indicator_1 <- "EN.ATM.CO2E.PC"
short_name_1 <- "co2pcap"
chosen_indicator_2 <- "NY.GDP.PCAP.PP.KD"
short_name_2 <- "gdppcap"
df_yourdata <- WDI(indicator = c(short_name_1 = chosen_indicator_1, short_name_2 = chosen_indicator_2),
extra = TRUE)
write_csv(df_yourdata, "data/yourdata.csv")
df_yourdata <- read_csv("data/yourdata.csv")
Rows: 16758 Columns: 14── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (7): country, iso2c, iso3c, region, capital, income, lending
dbl (5): year, short_name_1, short_name_2, longitude, latitude
lgl (1): status
date (1): lastupdated
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
COUNTRY <- "World"
df_yourdata |> filter(country == COUNTRY) |> drop_na(short_name_1) |>
ggplot(aes(year, short_name_1)) + geom_line() +
labs(title = "",
y = "")
Observations and Questions:
ISO2C <- c("JP", "CN", "ID", "GB", "US", "DE", "FR")
df_yourdata |> filter(iso2c %in% ISO2C) |> drop_na(short_name_1) |>
ggplot(aes(year, short_name_1, col = iso2c)) + geom_line() +
labs(title = "",
subtitle = "China, Germany, France, United Kingdom, India, Japan, United States",
y = "")
Observations and Questions:
COUNTRY <- "World"
df_yourdata |> filter(country == COUNTRY) |> drop_na(short_name_2) |>
ggplot(aes(year, short_name_2)) + geom_line() +
labs(title = "")
Observations and Questions:
ISO2C <- c("JP", "CN", "ID", "GB", "US", "DE", "FR")
df_yourdata |> filter(iso2c %in% ISO2C) |> drop_na(short_name_2) |>
ggplot(aes(year, short_name_2, col = iso2c)) + geom_line() +
labs(title = "",
subtitle = "China, Germany, France, United Kingdom, India, Japan, United States",
y = "",
caption = "")
Observations and Questions:
df_yourdata |> filter(year == 2020) |> filter(region != "Aggregates") |>
drop_na(short_name_1) |> arrange(desc(short_name_1))
df_yourdata |> filter(year == 2020) |> filter(region != "Aggregates") |>
drop_na(short_name_1) |> arrange(short_name_1)
Observations and Questions:
df_yourdata |> filter(year == 2020) |> filter(region != "Aggregates") |>
drop_na(short_name_2) |> arrange(desc(short_name_2))
Observations and Questions:
df_yourdata |> filter(year == 2020) |> filter(region != "Aggregates") |>
drop_na(short_name_2) |> arrange(short_name_2)
Observations and Questions:
df_yourdata |> filter(year == 2020) |> filter(region != "Aggregates") |>
drop_na(short_name_1) |> filter(income != "Not classified") |>
ggplot(aes(short_name_1, fill = factor(income, levels = INCOME))) + geom_histogram(bins = 15, col = "black", linewidth = 0.1) +
scale_x_log10() +
labs(title = "", fill = "")
Observations and Questions:
df_yourdata |> filter(year %in% c(1990, 2000, 2010, 2020)) |> filter(region != "Aggregates") |>
drop_na(short_name_1) |> filter(short_name_1 > 0) |> filter(income != "Not classified") |>
ggplot(aes(short_name_1, fill = factor(year))) +
geom_histogram(bins = 15, col = "black", linewidth = 0.1) +
scale_x_log10() + facet_wrap(~year) +
labs(title = "", fill = "")
Observations and Questions:
df_yourdata |> filter(year %in% c(1990, 2000, 2010, 2020)) |> filter(region != "Aggregates") |>
drop_na(short_name_1) |> filter(short_name_1 > 0) |> filter(income != "Not classified") |>
ggplot(aes(short_name_1, factor(year), fill = factor(year))) +
geom_boxplot() + scale_x_log10() + labs(y = "") + theme(legend.position = "none")
Observations and Questions:
df_yourdata |> filter(year == 2020) |> filter(region != "Aggregates") |>
drop_na(short_name_1) |> filter(short_name_1 > 0) |> filter(income != "Not classified") |>
ggplot(aes(short_name_1, factor(income, levels = INCOME), fill = income)) +
geom_boxplot() + scale_x_log10() +
labs(title = "", y = "", fill = "") +
theme(legend.position = "none")
Observations and Questions:
df_yourdata |> filter(year == 2020) |> filter(region != "Aggregates") |>
drop_na(short_name_1) |> filter(short_name_1 > 0) |>
ggplot(aes(short_name_1, region, fill = region)) +
geom_boxplot() + scale_x_log10() +
labs(title = "", y = "", fill = "") +
theme(legend.position = "none")
Observations and Questions:
df_yourdata |> filter(year == 2020) |> filter(region != "Aggregates") |>
drop_na(short_name_2) |> filter(income != "Not classified") |>
ggplot(aes(short_name_2, fill = factor(income, levels = INCOME))) + geom_histogram(bins = 15, col = "black", linewidth = 0.1) +
scale_x_log10() +
labs(title = "", fill = "")
df_yourdata |> filter(year %in% c(1990, 2000, 2010, 2020)) |> filter(region != "Aggregates") |>
drop_na(short_name_2) |> filter(short_name_2 > 0) |> filter(income != "Not classified") |>
ggplot(aes(short_name_2, fill = factor(year))) +
geom_histogram(bins = 15, col = "black", linewidth = 0.1) +
scale_x_log10() + facet_wrap(~year) +
labs(title = "", fill = "") +
theme(legend.position = "none")
Observations and Questions:
df_yourdata |> filter(year %in% c(1990, 2000, 2010, 2020)) |> filter(region != "Aggregates") |>
drop_na(short_name_2) |> filter(short_name_2 > 0) |> filter(income != "Not classified") |>
ggplot(aes(short_name_2, factor(year), fill = factor(year))) +
geom_boxplot() + scale_x_log10() + labs(y = "") + theme(legend.position = "none")
Observations and Questions:
df_yourdata |> filter(year == 2020) |> filter(region != "Aggregates") |>
drop_na(short_name_2) |> filter(short_name_2 > 0) |> filter(income != "Not classified") |>
ggplot(aes(short_name_2, factor(income, levels = INCOME), fill = income)) +
geom_boxplot() + scale_x_log10() +
labs(title = "", y = "", fill = "") +
theme(legend.position = "none")
Observations and Questions:
df_yourdata |> filter(year == 2020) |> filter(region != "Aggregates") |>
drop_na(short_name_2) |> filter(short_name_2 > 0) |>
ggplot(aes(short_name_2, region, fill = region)) +
geom_boxplot() + scale_x_log10() +
labs(title = "", y = "", fill = "") +
theme(legend.position = "none")
df_yourdata |> filter(year == 2020) |>
drop_na(short_name_2, short_name_1) |>
ggplot(aes(short_name_2, short_name_1)) + geom_point(aes(col = region)) +
geom_smooth(method = "lm", formula = 'y~x', se = FALSE) +
scale_x_log10() + scale_y_log10() +
labs(title = "",
x = "",
y = "")
Observations and Questions:
df_yourdata |> filter(year == 2020) |> drop_na(short_name_2, short_name_1) |>
lm(log10(short_name_1)~log10(short_name_2), data = _) |> summary()
Call:
lm(formula = log10(short_name_1) ~ log10(short_name_2), data = drop_na(filter(df_yourdata,
year == 2020), short_name_2, short_name_1))
Residuals:
Min 1Q Median 3Q Max
-0.60778 -0.15660 -0.00651 0.16129 0.59437
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -4.31545 0.13386 -32.24 <2e-16 ***
log10(short_name_2) 1.13831 0.03288 34.62 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.2362 on 228 degrees of freedom
Multiple R-squared: 0.8402, Adjusted R-squared: 0.8395
F-statistic: 1199 on 1 and 228 DF, p-value: < 2.2e-16
Observations and Questions: